#!/bin/sh
# set -x
#
@copyright_hash@
#
# hpcrun -- set the environ variables for profiling with HPCToolkit
# and launch the program.  See 'hpcrun -h' for a list of options.
#

#------------------------------------------------------------
# Values from configure
#------------------------------------------------------------

@launch_script_vars@

# Relative paths are relative to HPCTOOLKIT.
hpcfnbounds_dir='libexec/hpctoolkit'
hpcrun_dir='lib/hpctoolkit'
libmonitor_dir='@LIBMONITOR_RUN_DIR@'
libunwind_dir='@LIBUNWIND_RUN_DIR@'
papi_libdir='@OPT_PAPI_LIBPATH@'
perfmon_libdir='@PERFMON_LIB@'

prog_name=hpcrun

#------------------------------------------------------------
# Find path to this script
#------------------------------------------------------------

hpc_path_to_root=..
@export_hpctoolkit@

# Relative paths are relative to HPCTOOLKIT.
case "$hpcfnbounds_dir" in
    /* ) ;;
    * ) hpcfnbounds_dir="${HPCTOOLKIT}/${hpcfnbounds_dir}" ;;
esac
case "$hpcrun_dir" in
    /* ) ;;
    * ) hpcrun_dir="${HPCTOOLKIT}/${hpcrun_dir}" ;;
esac
case "$libmonitor_dir" in
    /* ) ;;
    * ) libmonitor_dir="${HPCTOOLKIT}/${libmonitor_dir}" ;;
esac
case "$libunwind_dir" in
    /* ) ;;
    * )  libunwind_dir="${HPCTOOLKIT}/${libunwind_dir}" ;;
esac
case "$papi_libdir" in
    /* ) ;;
    * ) papi_libdir="${HPCTOOLKIT}/${papi_libdir}" ;;
esac

#
# Once hpcrun_dir is completely set, set
#
ext_dir="$hpcrun_dir"/ext-libs

#------------------------------------------------------------
# Usage Message
#------------------------------------------------------------

die()
{
    cat <<EOF 1>&2
hpcrun: $*
use 'hpcrun -h' for a summary of options
EOF
    exit 1
}

usage()
{
    cat <<EOF
Usage:
  hpcrun [profiling-options] <command> [command-arguments]
  hpcrun [info-options]

hpcrun profiles the execution of an arbitrary command <command> using
statistical sampling (rather than instrumentation).  It collects
per-thread call path profiles that represent the full calling context of
sample points.  Sample points may be generated from multiple simultaneous
sampling sources.  hpcrun profiles complex applications that use forks,
execs, threads, and dynamic linking/unlinking; it may be used in conjuction
with parallel process launchers such as MPICH's mpiexec and SLURM's srun.

To profile a statically linked executable, make sure to link with hpclink.

To configure hpcrun's sampling sources, specify events and periods using
the -e/--event option.  For an event 'e' and period 'p', after every 'p'
instances of 'e', a sample is generated that causes hpcrun to inspect the
and record information about the monitored <command>.

When <command> terminates, a profile measurement databse will be written to
the directory:
  hpctoolkit-<command>-measurements[-<jobid>]
where <jobid> is a job launcher id that associated with the execution, if any.

hpcrun enables a user to abort a process and write the partial profiling
data to disk by sending a signal such as SIGINT (often bound to Ctrl-C).  
This can be extremely useful on long-running or misbehaving applications.

Options: Informational
  -l, -L --list-events List available events. (N.B.: some may not be profilable)
  -V, --version        Print version information.
  -h, --help           Print help.

Options: Profiling (Defaults shown in curly brackets {})
  -e <event>[@<howoften>], --event <event>[@<howoften>]
                      event  may  be an architecture-independent hardware or 
                      software event supported by Linux perf, a native hardware 
                      counter event, a hardware counter event supported by the 
                      PAPI library, a Linux  system timer (CPUTIME and REALTIME), 
                      or the operating system interval timer WALLCLOCK.  This option 
                      may be given multiple times to profile several events at once.  
                      If the value for <howoften> is a number, it will be 
                      interpreted as a sample period. For Linux perf events, one 
                      may specify a sampling frequency for 'howoften' by writing f 
                      before a number.  For instance, to sample an event 100 times 
                      per second,  specify  <howoften>  as '@f100'. For Linux perf 
                      events, if no value for <howoften> is specified, hpcrun 
                      will monitor the event using frequency-based sampling at 300 
                      samples/second.

  -c, --count <howoften>
                      Only  available  for  events  managed  by Linux perf. This 
                      option specifies a default value for how often to sample. The 
                      value for <howoften> may be a number that will be used as a 
                      default event period or an f followed by a number, e.g. f100, 
                      to specify a default sampling frequency in samples/second.

  -t, --trace          Generate a call path trace in addition to a call
                       path profile.

  -ds, --delay-sampling
                       Delay starting sampling until the application calls
                       hpctoolkit_sampling_start().

  -f <frac>, -fp <frac>, --process-fraction <frac>
                       Measure only a fraction <frac> of the execution's
                       processes.  For each process, enable measurement
                       (of all threads) with probability <frac>; <frac> is a
                       real number (0.10) or a fraction (1/10) between 0 and 1.
  
  -m, --merge-threads  Merge non-overlapped threads into one virtual thread.
                       This option is to reduce the number of generated
                       profile and trace files as each thread generates its own
                       profile and trace data. The options are:
                       0 : do not merge non-overlapped threads
                       1 : merge non-overlapped threads (default) 

  -o <outpath>, --output <outpath>
                       Directory for output data.
                       {hpctoolkit-<command>-measurements[-<jobid>]}

                       Bug: Without a <jobid> or an output option, multiple
                       profiles of the same <command> will be placed in the
                       same output directory.

  -r, --retain-recursion
                       Normally, hpcrun will collapse (simple) recursive call chains
                       to save space and analysis time. This option disables that 
                       behavior: all elements of a recursive call chain will be recorded
                       NOTE: If the user employs the RETCNT sample source, then this
                             option is enabled: RETCNT implies *all* elements of
                             call chains, including recursive elements, are recorded.

NOTES:
* hpcrun uses preloaded shared libraries to initiate profiling.  For this
  reason, it cannot be used to profile setuid programs.
* hpcrun may not be able to profile programs that themselves use preloading.

EOF
    exit 0
}

#------------------------------------------------------------
# Command Line Options
#------------------------------------------------------------

@launch_early_options@

# Return success (0) if $1 is not empty and not the next option.
arg_ok()
{
    case "x$1" in
	x | x-* ) return 1 ;;
	* ) return 0 ;;
    esac
}

# Process options and export environ variables.  LD_LIBRARY_PATH and
# LD_PRELOAD should be delayed until we launch the program, but the
# others can be set now.

preload_list=
HPCRUN_DEBUG_FLAGS=
HPCRUN_EVENT_LIST=

while test "x$1" != x
do
    arg="$1" ; shift
    case "$arg" in

	-md | --monitor-debug )
	    export MONITOR_DEBUG=1
	    ;;

	-d | --debug )
	    export HPCRUN_WAIT=1
	    ;;

	-dd | --dynamic-debug )
	    arg_ok "$1" || die "missing argument for $arg"
	    export HPCRUN_DEBUG_FLAGS="$HPCRUN_DEBUG_FLAGS $1"
	    shift
	    ;;

	-h | -help | --help )
	    usage
	    ;;

	# --------------------------------------------------

	-a | --agent )
	    arg_ok "$1" || die "missing argument for $arg"
	    export HPCRUN_OPT_LUSH_AGENTS="$1"
	    shift
	    ;;

	# --------------------------------------------------

	-e | --event )
	    arg_ok "$1" || die "missing argument for $arg"
	    case "$1" in
	        GA* )      preload_list="${preload_list} ${hpcrun_dir}/libhpcrun_ga.so" ;;
		IO* )      preload_list="${preload_list} ${hpcrun_dir}/libhpcrun_io.so" ;;
		MEMLEAK* ) preload_list="${preload_list} ${hpcrun_dir}/libhpcrun_memleak.so" ;;
		DATACENTRIC*  ) preload_list="${preload_list} ${hpcrun_dir}/libhpcrun_datacentric.so" ;;
		PTHREAD_WAIT* ) preload_list="${preload_list} ${hpcrun_dir}/libhpcrun_pthread.so" ;;
		CPU_GPU_IDLE* ) preload_list="${preload_list} ${hpcrun_dir}/libhpcrun_gpu.so" ;;
		MPI* )     preload_list="${preload_list} ${hpcrun_dir}/libhpcrun_mpi.so" ;;
	    esac
	    case "$HPCRUN_EVENT_LIST" in
		'' ) HPCRUN_EVENT_LIST="$1" ;;
		* )  HPCRUN_EVENT_LIST="$HPCRUN_EVENT_LIST $1" ;;
	    esac
	    shift
	    ;;

	-L | -l | --list-events )
	    export HPCRUN_EVENT_LIST=LIST
	    ;;

	-ds | --delay-sampling )
	    export HPCRUN_DELAY_SAMPLING=1
	    ;;

	# --------------------------------------------------

 	-c | --count )
 	 	export HPCRUN_PERF_COUNT="$1"
	    shift
	    ;;

	# --------------------------------------------------

	-t | --trace )
	    export HPCRUN_TRACE=1
	    ;;

	# --------------------------------------------------

	-o | --output )
	    arg_ok "$1" || die "missing argument for $arg"
	    export HPCRUN_OUT_PATH="$1"
	    shift
	    ;;

	# --------------------------------------------------

	-r | --retain-recursion )
	    export HPCRUN_RETAIN_RECURSION=1
	    ;;

  # --------------------------------------------------

  -m | --merge-threads )
      arg_ok "$1" || die "missing argument for $arg"
      export HPCRUN_MERGE_THREADS="$1"
      shift
      ;;
      
	# --------------------------------------------------

	-lm | --low-memsize )
	    arg_ok "$1" || die "missing argument for $arg"
	    export HPCRUN_LOW_MEMSIZE="$1"
	    shift
	    ;;

	-ms | --memsize )
	    arg_ok "$1" || die "missing argument for $arg"
	    export HPCRUN_MEMSIZE="$1"
	    shift
	    ;;

	# --------------------------------------------------

	-f | -fp | --process-fraction )
	    arg_ok "$1" || die "missing argument for $arg"
	    export HPCRUN_PROCESS_FRACTION="$1"
	    shift
	    ;;

	-mp | --memleak-prob )
	    arg_ok "$1" || die "missing argument for $arg"
	    export HPCRUN_MEMLEAK_PROB="$1"
	    shift
	    ;;

	# --------------------------------------------------

	-- )
	    break
	    ;;

	-* )
	    die "unknown or invalid option: $arg"
	    ;;

	* )
	    set -- "$arg" "$@"
	    break
	    ;;
    esac
done

# Add default sampling source if needed.
case "$HPCRUN_EVENT_LIST" in
    '' ) HPCRUN_EVENT_LIST='WALLCLOCK@5000' ;;
    RETCNT ) HPCRUN_EVENT_LIST='WALLCLOCK@5000 RETCNT' ;;
esac
export HPCRUN_EVENT_LIST

# There must be a command to run, unless -L is set.
if test -z "$1" ; then
    if test "$HPCRUN_EVENT_LIST" = LIST ; then
	set -- /bin/ls
    else
	die "no command to profile"
    fi
fi

#------------------------------------------------------------
# Pre-Launch Sanity Checks
#------------------------------------------------------------

# Find the command on PATH.  We need to run file and nm on the binary,
# so we need an actual path.

command="$1"
case "$command" in
    */* ) ;;
    * )
	OLDIFS="$IFS"
	IFS=:
	for dir in $PATH ; do
	    if test -x "${dir}/${command}" ; then
		command="${dir}/${command}"
		break
	    fi
	done
	IFS="$OLDIFS"
	;;
esac

# Sanity checks before launch.

file_exists=no
if type file >/dev/null 2>&1 ; then
    file_exists=yes
fi

if test -x "$command" && test "$file_exists" = yes ; then
    #
    # For dynamic binaries, verify that the application and libhpcrun
    # have the same wordsize, both 32-bit or both 64-bit.
    #
    cmd_file_out=`file -L "$command" 2>/dev/null`
    echo "$cmd_file_out" | grep -E -i -e 'elf.*dynamic' >/dev/null 2>&1
    if test $? -eq 0 ; then
	appl_bit=`expr "$cmd_file_out" : '.*ELF.*\([0-9][0-9].bit\)'`
	file_out=`file -L "${hpcrun_dir}/libhpcrun.so"`
	hpcrun_bit=`expr "$file_out" : '.*ELF.*\([0-9][0-9].bit\)'`
	if test "$appl_bit" != "$hpcrun_bit" ; then
	    echo "hpcrun: cannot profile application: $command" 1>&2
	    echo "application is $appl_bit but hpctoolkit is $hpcrun_bit" 1>&2
	    exit 1
	fi
    fi
    #
    # For static binaries, verify that hpcrun is linked in.  Use
    # strings instead of nm to handle stripped binaries.
    #
    echo "$cmd_file_out" | grep -E -i -e 'elf.*static' >/dev/null 2>&1
    if test $? -eq 0 ; then
	strings "$command" 2>&1 | grep -e hpcrun >/dev/null 2>&1
	if test $? -ne 0 ; then
	    echo "hpcrun: static binary is missing libhpcrun: $command" 1>&2
	    echo "rebuild the application with hpclink" 1>&2
	    exit 1
	fi
    fi
fi

#------------------------------------------------------------
# Final Environ Settings and Exec the Binary
#------------------------------------------------------------

# Disable the darshan I/O library.  This intercepts some I/O functions
# inside signal handlers and can cause deadlock.
export DARSHAN_DISABLE=1

# Add OMP_SKIP_MSB to HPCRUN_DEBUG_FLAGS if the binary contains
# _mp_init.

nm "$command" 2>/dev/null | grep -e ' _mp_init' >/dev/null 2>&1
if test $? -eq 0 ; then
    export HPCRUN_DEBUG_FLAGS="$HPCRUN_DEBUG_FLAGS OMP_SKIP_MSB"
fi

# Enable core files.
ulimit -S -c unlimited >/dev/null 2>&1

# laks 2016.10.06: add perfmon directory
hpc_ld_library_path="${hpcrun_dir}:${papi_libdir}:${ext_dir}:${perfmon_libdir}"
preload_list="${libmonitor_dir}/libmonitor.so ${preload_list}"
preload_list="${hpcrun_dir}/libhpcrun.so ${preload_list}"

export HPCRUN_FNBOUNDS_CMD="${hpcfnbounds_dir}/hpcfnbounds"
export LD_LIBRARY_PATH="${hpc_ld_library_path}:${LD_LIBRARY_PATH}"
export LD_PRELOAD="${preload_list} ${LD_PRELOAD}"

exec "$@"
